source("../GAA-EVAL.R")
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'pROC' was built under R version 3.4.4
## Warning: package 'reshape2' was built under R version 3.4.3
## Warning: package 'ggrepel' was built under R version 3.4.4
## Warning: package 'gmodels' was built under R version 3.4.4
exp.data <- read.RealData(file = "exp_data.csv", sep = "\t",
col.id = 1, col.value = 2, col.sd = 3,na.character = 'NA')
length(exp.data$value)
exp.data.s <- read.RealData(file = "exp_data_s.csv", sep = ",",
col.id = 1, col.value = 2, col.sd = 3,na.character = 'NA')
head(exp.data.s$value)
sub.data <- read.Submission.Folder(folder.name = "prediction/",col.id = 1,
col.value = 2, col.sd = 3, real.data = exp.data)
sub.data.s = read.Submission.Folder(folder.name = "prediction/",col.id = 1,
col.value = 2, col.sd = 3, real.data = exp.data.s)
sub.data = addGroup(sub.data,c(1,1,2,3,3,4,4,5,6,6,6,6,7,8,9,9))
sub.data.s = addGroup(sub.data.s,c(1,1,2,3,3,4,4,5,6,6,6,6,7,8,9,9))
plot_all_scatter(real.data = exp.data, pred.data = sub.data, z.transform = TRUE)
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4894 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4894 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4894 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4484 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
# 1. Render coefficient value
result.cor.pearson <- eval.Correlation(real.data = exp.data.s, pred.data = sub.data.s,
method = "pearson", sd.use = NA,z.transform = TRUE)
head(result.cor.pearson)
## pearson.coefficient.n=219 p.value
## Group_39-prediction_file-1 0.3165561 2.169579e-06
## Group_39-prediction_file-2 0.3123934 2.992671e-06
## Group_40-prediction_file-1 0.2796749 3.178925e-05
## Group_41-prediction_file-1 0.3675506 2.799291e-08
## Group_41-prediction_file-2 0.3675506 2.799291e-08
## Group_42-prediction_file-1 0.1334869 5.062460e-02
# 2. Plot Correlation
plot.Correlation(result.cor.pearson, "Pearson")
result.rmsd1 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = TRUE)
head(result.rmsd1)
## RMSD
## Group_39-prediction_file-1 1.0685672
## Group_39-prediction_file-2 0.9949796
## Group_40-prediction_file-1 1.1218989
## Group_41-prediction_file-1 1.5450756
## Group_41-prediction_file-2 1.5450756
## Group_42-prediction_file-1 1.6199197
plot.RMSD(result.rmsd1, method="")
# with variance.normalization
result.rmsd2 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = FALSE, density.distance.adjust = FALSE, variance.normalization = TRUE)
head(result.rmsd2)
## RMSD
## Group_39-prediction_file-1 1.094220
## Group_39-prediction_file-2 1.018284
## Group_40-prediction_file-1 1.138296
## Group_41-prediction_file-1 1.569258
## Group_41-prediction_file-2 1.569258
## Group_42-prediction_file-1 1.617173
plot.RMSD(result.rmsd2, method="")
# with variance.normalization
result.rmsd3 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = FALSE)
head(result.rmsd3)
## RMSD
## Group_39-prediction_file-1 0.7423798
## Group_39-prediction_file-2 0.7086982
## Group_40-prediction_file-1 0.6115521
## Group_41-prediction_file-1 0.5694153
## Group_41-prediction_file-2 0.5694153
## Group_42-prediction_file-1 0.6404718
plot.RMSD(result.rmsd3, method="")
# with variance.normalization
result.rmsd4 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = FALSE, density.distance.adjust = FALSE, variance.normalization = FALSE)
head(result.rmsd4)
## RMSD
## Group_39-prediction_file-1 0.7602017
## Group_39-prediction_file-2 0.7252975
## Group_40-prediction_file-1 0.6204900
## Group_41-prediction_file-1 0.5783273
## Group_41-prediction_file-2 0.5783273
## Group_42-prediction_file-1 0.6393859
plot.RMSD(result.rmsd4, method="")
result.auc.0.4 <- eval.AUC(real.data = exp.data.s, pred.data = sub.data.s,
threshold = 0.4,z.transform = T)
## [1] "Group_39-prediction_file-1"
## [1] "Group_39-prediction_file-2"
## [1] "Group_40-prediction_file-1"
## [1] "Group_41-prediction_file-1"
## [1] "Group_41-prediction_file-2"
## [1] "Group_42-prediction_file-1"
## [1] "Group_42-prediction_file-2"
## [1] "Group_43-prediction_file-1"
## [1] "Group_44-prediction_file-1"
## [1] "Group_44-prediction_file-2"
## [1] "Group_44-prediction_file-3"
## [1] "Group_44-prediction_file-4"
## [1] "Group_45-prediction_file-1"
## [1] "Group_46-prediction_file-1-late"
## [1] "Group_47-prediction_file-1"
## [1] "Group_47-prediction_file-2"
head(result.auc.0.4$results)
## AUC.0.4 sensitivity specificity accuracy
## Group_39-prediction_file-1 0.6968 0.5616 0.7254 0.6698
## Group_39-prediction_file-2 0.6938 0.5068 0.7465 0.6651
## Group_40-prediction_file-1 0.6953 0.3836 0.7746 0.6419
## Group_41-prediction_file-1 0.6753 0.7534 0.4789 0.5721
## Group_41-prediction_file-2 0.6753 0.7534 0.4789 0.5721
## Group_42-prediction_file-1 0.5918 0.3836 0.7113 0.6000
## precision f1_score bAccu
## Group_39-prediction_file-1 0.5125 0.5359 0.6435
## Group_39-prediction_file-2 0.5068 0.5068 0.6267
## Group_40-prediction_file-1 0.4667 0.4211 0.5791
## Group_41-prediction_file-1 0.4264 0.5446 0.6161
## Group_41-prediction_file-2 0.4264 0.5446 0.6161
## Group_42-prediction_file-1 0.4058 0.3944 0.5474
plot.AUC(result.auc.0.4)
# for all the submission files
result.bM.pearson <-eval.Correlation.Between(real.data = exp.data.s, pred.data = sub.data.s,
method = "pearson",sd.use = NA,z.transform = TRUE)
plot.Correlation.Between(result.bM.pearson$coefficient, method="pearson")
# for best submission of each group
result.bM.pearson <-eval.Correlation.Between(real.data = exp.data, pred.data = sub.data,
method = "pearson",sd.use = NA,z.transform = TRUE,grouped = TRUE)
## Warning: package 'bindrcpp' was built under R version 3.4.4
plot.Correlation.Between(result.bM.pearson$coefficient, method="pearson")
# result.pCor <- eval.Partial.Correlation(real.data = exp.data, pred.data = sub.data, method = "spearman")
# plot.Correlation.Between(result.bM.spearman$coefficient, method="Spearman")
length(exp.data.s$value)
## [1] 219
total = cbind(real = exp.data.s$value,sub.data.s$value)
Plot.PCA(na.omit(total), labels=F, legend=TRUE)
# uniqueness as adj.r^2 difference between total linear model and linear models without certain group
result.uniq = eval.uniqueness(real.data = exp.data, pred.data = sub.data)
result.uniq
## uniqueness
## 1 -0.0035299825
## 2 -0.0018514005
## 3 0.0072155217
## 4 -0.0019861941
## 5 0.0042271752
## 6 0.0014769835
## 7 -0.0013198760
## 8 -0.0006020648
## 9 0.0036086216
plot.uniqueness(result.uniq, method="")
result.uniq.s = eval.uniqueness(real.data = exp.data.s, pred.data = sub.data.s)
sub.data.s$group$group
## [1] 1 1 2 3 3 4 4 5 6 6 6 6 7 8 9 9
plot.uniqueness(result.uniq.s, method="")
For Correlation-based Evaluation, provide mean, CI, and median pval
# 1. Render coefficient value
boot.result.cor.pearson <- eval.Correlation(real.data = exp.data.s, pred.data = sub.data.s,
method = "pearson", sd.use = NA,z.transform = TRUE,boot = T)
head(boot.result.cor.pearson)
## avg low_ci high_ci sd
## Group_39-prediction_file-1 0.3159587 0.21933427 0.4256732 0.06414406
## Group_39-prediction_file-2 0.3120020 0.21528626 0.4206262 0.06283910
## Group_40-prediction_file-1 0.2848922 0.19129022 0.3853243 0.05991413
## Group_41-prediction_file-1 0.3653808 0.27890482 0.4500688 0.05126986
## Group_41-prediction_file-2 0.3653808 0.27890482 0.4500688 0.05126986
## Group_42-prediction_file-1 0.1311390 0.01760134 0.2586587 0.07550710
## p.value
## Group_39-prediction_file-1 2.668560e-06
## Group_39-prediction_file-2 3.399519e-06
## Group_40-prediction_file-1 2.732385e-05
## Group_41-prediction_file-1 2.218511e-08
## Group_41-prediction_file-2 2.218511e-08
## Group_42-prediction_file-1 6.216560e-02
# 2. Plot Correlation
plot.Correlation(boot.result.cor.pearson, "Pearson",boot = TRUE)
For RMSD-based Evaluation
# 1. Render coefficient value
result.cor.pearson <- eval.Correlation(real.data = exp.data.s, pred.data = sub.data.s,
method = "pearson", sd.use = NA,z.transform = TRUE)
head(result.cor.pearson)
## pearson.coefficient.n=219 p.value
## Group_39-prediction_file-1 0.3165561 2.169579e-06
## Group_39-prediction_file-2 0.3123934 2.992671e-06
## Group_40-prediction_file-1 0.2796749 3.178925e-05
## Group_41-prediction_file-1 0.3675506 2.799291e-08
## Group_41-prediction_file-2 0.3675506 2.799291e-08
## Group_42-prediction_file-1 0.1334869 5.062460e-02
# 2. Plot Correlation
plot.Correlation(result.cor.pearson, "Pearson")
result.rmsd1 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = TRUE,boot = T)
head(result.rmsd1)
## RMSD low_ci high_ci sd
## Group_39-prediction_file-1 1.077589 0.9747741 1.184645 0.06489218
## Group_39-prediction_file-2 1.001295 0.9116030 1.097849 0.05719669
## Group_40-prediction_file-1 1.127197 1.0409296 1.223458 0.05720155
## Group_41-prediction_file-1 1.567284 1.3940711 1.756550 0.11342868
## Group_41-prediction_file-2 1.567284 1.3940711 1.756550 0.11342868
## Group_42-prediction_file-1 1.630662 1.4583752 1.801228 0.10278278
plot.RMSD(result.rmsd1, method="")
# with variance.normalization
result.rmsd2 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = FALSE, density.distance.adjust = FALSE, variance.normalization = TRUE,boot = T)
head(result.rmsd2)
## RMSD low_ci high_ci sd
## Group_39-prediction_file-1 1.099499 0.9941265 1.209010 0.06557091
## Group_39-prediction_file-2 1.023055 0.9290500 1.117659 0.05752699
## Group_40-prediction_file-1 1.145974 1.0572586 1.238049 0.05580197
## Group_41-prediction_file-1 1.578594 1.4014106 1.777544 0.11004123
## Group_41-prediction_file-2 1.578594 1.4014106 1.777544 0.11004123
## Group_42-prediction_file-1 1.630499 1.4693911 1.810466 0.10095696
plot.RMSD(result.rmsd2, method="")
# with variance.normalization
result.rmsd3 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = FALSE,boot = T)
head(result.rmsd3)
## RMSD low_ci high_ci sd
## Group_39-prediction_file-1 0.7391888 0.6719041 0.8039118 0.03901417
## Group_39-prediction_file-2 0.7055611 0.6410194 0.7671101 0.03995325
## Group_40-prediction_file-1 0.6079143 0.5474106 0.6713188 0.03665754
## Group_41-prediction_file-1 0.5675694 0.5260024 0.6066957 0.02454987
## Group_41-prediction_file-2 0.5675694 0.5260024 0.6066957 0.02454987
## Group_42-prediction_file-1 0.6366967 0.5786906 0.6980475 0.03634241
plot.RMSD(result.rmsd3, method="")
# with variance.normalization
result.rmsd4 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = FALSE, density.distance.adjust = FALSE, variance.normalization = FALSE,boot = T)
head(result.rmsd4)
## RMSD low_ci high_ci sd
## Group_39-prediction_file-1 0.7609928 0.6960201 0.8240467 0.03954468
## Group_39-prediction_file-2 0.7258915 0.6564268 0.7901490 0.04020173
## Group_40-prediction_file-1 0.6217584 0.5682989 0.6789840 0.03488304
## Group_41-prediction_file-1 0.5792009 0.5373457 0.6190344 0.02433503
## Group_41-prediction_file-2 0.5792009 0.5373457 0.6190344 0.02433503
## Group_42-prediction_file-1 0.6404705 0.5874168 0.6999961 0.03458017
plot.RMSD(result.rmsd4, method="")
For Uniqueness Evaluation
result.uniq = eval.uniqueness(real.data = exp.data, pred.data = sub.data,boot = TRUE)
result.uniq
## uniqueness low_ci high_ci sd
## 1 0.0008588146 -0.004117093 0.01558673 0.006794240
## 2 0.0017450875 -0.004095540 0.01830906 0.007176755
## 3 0.0104885219 -0.003393715 0.03487135 0.012783660
## 4 0.0026437797 -0.003987992 0.01989985 0.009027008
## 5 0.0069503033 -0.003842305 0.02707928 0.010213371
## 6 0.0067178900 -0.004039506 0.03145771 0.012257302
## 7 0.0019218868 -0.003991726 0.01654487 0.007345860
## 8 0.0035549520 -0.003949144 0.02356930 0.009393296
## 9 0.0065076136 -0.003900567 0.02681468 0.011319813
plot.uniqueness(result.uniq, method="",boot = TRUE)
result.uniq.s = eval.uniqueness(real.data = exp.data.s, pred.data = sub.data.s,boot = TRUE)
result.uniq.s
## uniqueness low_ci high_ci sd
## 1 0.0008539077 -0.003936736 0.01557216 0.006578812
## 2 0.0024231333 -0.003922784 0.02120193 0.008289418
## 3 0.0101985701 -0.003271067 0.03210386 0.011488873
## 4 0.0027188455 -0.003884709 0.02252701 0.009202256
## 5 0.0072641114 -0.003777335 0.02947057 0.011570498
## 6 0.0064521426 -0.003834966 0.02913687 0.011782762
## 7 0.0020569491 -0.003946284 0.01895548 0.007524355
## 8 0.0030298598 -0.003899808 0.01901317 0.008519997
## 9 0.0061782886 -0.003839796 0.02801771 0.010706875
plot.uniqueness(result.uniq.s, method="",boot = TRUE)